package com.play;

import org.xml.sax.SAXException;

import edu.uci.ics.crawler4j.parser.ExtractedUrlAnchorPair;
import edu.uci.ics.crawler4j.parser.HtmlContentHandler;
/**
 * 爱慕
 * @author kongxp
 *
 */
public class AimuHandler extends HtmlContentHandler{

	private boolean isContent = false;
	
	@Override
	protected void addOutgoingUrls(ExtractedUrlAnchorPair url) {
		if(isContent){
			System.out.println(url.getHref());
			super.addOutgoingUrls(url);
		}
	}
	@Override
	public void characters(char[] ch, int start, int length)
			throws SAXException {
		String content = new String(ch, start, length);
		
		if("上一页".equals(content)){
			if(isContent){
				 isContent=false;
			}else
				isContent=true;
		}
		if(isContent){
			bodyText.append(ch, start, length);

			if (anchorFlag) {
				anchorText.append(new String(ch, start, length));
			}
		}
	}
	
	
	
}
